{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>patient_id</th>\n",
       "      <th>diagnosis</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>22</td>\n",
       "      <td>COPD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>COPD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>Bronchiolitis</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>22</td>\n",
       "      <td>COPD</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3</td>\n",
       "      <td>Bronchiolitis</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   patient_id      diagnosis\n",
       "0          22           COPD\n",
       "1           2           COPD\n",
       "2           8  Bronchiolitis\n",
       "3          22           COPD\n",
       "4           3  Bronchiolitis"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred_df = pd.read_csv('../data/pred.csv')\n",
    "pred_df['patient_id'] = pred_df['patient_id']\n",
    "pred_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 [('COPD', 23)]\n",
      "2 [('COPD', 5)]\n",
      "3 [('Healthy', 2), ('Pneumonia', 2), ('Bronchiolitis', 1), ('COPD', 1)]\n",
      "4 [('URTI', 1)]\n",
      "5 [('Bronchiolitis', 1), ('Pneumonia', 1)]\n",
      "6 [('Pneumonia', 1), ('COPD', 1)]\n",
      "7 [('COPD', 5)]\n",
      "8 [('Bronchiolitis', 1)]\n",
      "9 [('Pneumonia', 1)]\n",
      "10 [('URTI', 1), ('Bronchiolitis', 1)]\n",
      "11 [('Pneumonia', 1), ('Bronchiolitis', 1)]\n",
      "12 [('COPD', 6)]\n",
      "13 [('Healthy', 1)]\n",
      "14 [('COPD', 21)]\n",
      "15 [('Healthy', 1)]\n",
      "16 [('Pneumonia', 8), ('Bronchiolitis', 2), ('URTI', 1), ('LRTI', 1)]\n",
      "17 [('Healthy', 1)]\n",
      "18 [('Healthy', 1)]\n",
      "19 [('Healthy', 1), ('COPD', 1)]\n",
      "20 [('Healthy', 1)]\n",
      "21 [('COPD', 28)]\n",
      "22 [('COPD', 27)]\n"
     ]
    }
   ],
   "source": [
    "id_list = np.unique(pred_df['patient_id'].tolist())\n",
    "id_list.sort()\n",
    "diagnosis = []\n",
    "for patient_id in id_list:\n",
    "    diagnosis_df = pred_df[pred_df['patient_id'] == patient_id]\n",
    "    ans = dict()\n",
    "    for j, vv in diagnosis_df.iterrows():\n",
    "        if vv['diagnosis'] not in ans:\n",
    "            ans[vv['diagnosis']] = 1\n",
    "        else:\n",
    "            ans[vv['diagnosis']] += 1\n",
    "    ans = sorted(ans.items(), key=lambda x: x[1], reverse=True)\n",
    "    print(patient_id, ans)\n",
    "    diagnosis.append(ans[0][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "submission = pd.DataFrame({'patient_id': id_list, 'diagnosis': diagnosis})\n",
    "submission.to_csv('../data/submission.csv', index=None, header=None)\n",
    "submission.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
